@article{DBLP:journals/corr/BahdanauCSBB15,
	author    = {Dzmitry Bahdanau and
	Jan Chorowski and
	Dmitriy Serdyuk and
	Philemon Brakel and
	Yoshua Bengio},
	title     = {End-to-End Attention-based Large Vocabulary Speech Recognition},
	journal   = {CoRR},
	volume    = {abs/1508.04395},
	year      = {2015},
	url       = {http://arxiv.org/abs/1508.04395},
	archivePrefix = {arXiv},
	eprint    = {1508.04395},
	timestamp = {Mon, 13 Aug 2018 16:47:35 +0200},
	biburl    = {https://dblp.org/rec/journals/corr/BahdanauCSBB15.bib},
	bibsource = {dblp computer science bibliography, https://dblp.org}
}


@misc{NWPUThesisLaTeXTemplate,
    title={{{\LaTeX}}-Template-For-NPU-Thesis},
    author={Shangkun Shen and Zhihe Wang and Jiduo Zhang and Weijia Zhang},
    year={2016},
    month={05}
}

@book{knuth1986the,
    title={The {{\TeX}}book},
    author={Knuth, Donald E},
    publisher={Addison-Wesley},
    year={1986}
}

@book{lamport1989latex:,
    title={{{\LaTeX}}: a document preparation system},
    author={Lamport, Leslie},
    publisher={Addison-Wesley Professional},
    year={1989}
}


@article{szegedy2015going,
    title={Going deeper with convolutions},
    author={Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott E and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew},
    journal={computer vision and pattern recognition},
    pages={1--9},
    year={2015}
}

@misc{MathSymbolsinLaTeXbypolossk,
    title={Math-Symbols-in-{{\LaTeX}}},
    author={Shangkun Shen},
    year={2017},
    month={10}
}
% This file was created with JabRef 2.10b2.
% Encoding: UTF-8

@article{Hinton2012Deep,
	title={Deep Neural Networks for Acoustic Modeling in Speech Recognition: The Shared Views of Four Research Groups},
	author={Hinton, Geoffrey and Deng, Li and Yu, Dong and Dahl, George E. and Mohamed, Abdel Rahman and Jaitly, Navdeep and Senior, Andrew and Vanhoucke, Vincent and Nguyen, Patrick and Sainath, Tara N.},
	journal={IEEE Signal Processing Magazine},
	volume={29},
	number={6},
	pages={82-97},
	year={2012},
}

@article{dahl2012context,
	title={Context-dependent pre-trained deep neural networks for large-vocabulary speech recognition},
	author={Dahl, George E and Yu, Dong and Deng, Li and Acero, Alex},
	journal={IEEE Transactions on audio, speech, and language processing},
	volume={20},
	number={1},
	pages={30--42},
	year={2012},
	publisher={IEEE}
}

@inproceedings{goodfellow2014generative,
	title={Generative adversarial nets},
	author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
	booktitle={Advances in neural information processing systems},
	pages={2672--2680},
	year={2014}
}

@article{ganin2016domain,
	title={Domain-adversarial training of neural networks},
	author={Ganin, Yaroslav and Ustinova, Evgeniya and Ajakan, Hana and Germain, Pascal and Larochelle, Hugo and Laviolette, Fran{\c{c}}ois and Marchand, Mario and Lempitsky, Victor},
	journal={The Journal of Machine Learning Research},
	volume={17},
	number={1},
	pages={2096--2030},
	year={2016},
	publisher={JMLR. org}
}

@inproceedings{panayotov2015librispeech,
	title={Librispeech: an ASR corpus based on public domain audio books},
	author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},
	booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5206--5210},
	year={2015},
	organization={IEEE}
}

@article{szegedy2013intriguing,
	title={Intriguing properties of neural networks},
	author={Szegedy, Christian and Zaremba, Wojciech and Sutskever, Ilya and Bruna, Joan and Erhan, Dumitru and Goodfellow, Ian and Fergus, Rob},
	journal={arXiv:1312.6199},
	year={2013}
}
@article{Graves2012Sequence,
	title={Sequence Transduction with Recurrent Neural Networks},
	author={Graves},
	journal={arXiv:1211.3711},
	year={2012}
}

@article{hansen1996analysis,
	title={Analysis and compensation of speech under stress and noise for environmental robustness in speech recognition},
	author={Hansen, John HL},
	journal={Speech communication},
	volume={20},
	number={1-2},
	pages={151--173},
	year={1996},
	publisher={Elsevier}
}

@book{loizou2007speech,
	title={Speech enhancement: theory and practice},
	author={Loizou, Philipos C},
	year={2007},
	publisher={CRC press}
}
@article{ephraim1984speech,
	title={Speech enhancement using a minimum-mean square error short-time spectral amplitude estimator},
	author={Ephraim, Yariv and Malah, David},
	journal={IEEE Transactions on acoustics, speech, and signal processing},
	volume={32},
	number={6},
	pages={1109--1121},
	year={1984},
	publisher={IEEE}
}

@book{benesty2006speech,
	title={Speech enhancement},
	author={Benesty, Jacob and Makino, Shoji and Chen, Jingdong},
	year={2006},
	publisher={Springer Science \& Business Media}
}
@article{cohen2002noise,
	title={Noise estimation by minima controlled recursive averaging for robust speech enhancement},
	author={Cohen, Israel and Berdugo, Baruch},
	journal={IEEE signal processing letters},
	volume={9},
	number={1},
	pages={12--15},
	year={2002},
	publisher={IEEE}
}

@book{benesty2008microphone,
	title={Microphone array signal processing},
	author={Benesty, Jacob and Chen, Jingdong and Huang, Yiteng},
	volume={1},
	year={2008},
	publisher={Springer Science \& Business Media}
}

@article{kaneda1986adaptive,
	title={Adaptive microphone-array system for noise reduction},
	author={Kaneda, Yutaka and Ohga, Juro},
	journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
	volume={34},
	number={6},
	pages={1391--1400},
	year={1986},
	publisher={IEEE}
}
@article{mccowan2003microphone,
	title={Microphone array post-filter based on noise field coherence},
	author={McCowan, Iain A and Bourlard, Herv{\'e}},
	journal={IEEE Transactions on Speech and Audio Processing},
	volume={11},
	number={6},
	pages={709--716},
	year={2003},
	publisher={IEEE}
}

@article{xu2014experimental,
	title={An experimental study on speech enhancement based on deep neural networks},
	author={Xu, Yong and Du, Jun and Dai, Li-Rong and Lee, Chin-Hui},
	journal={IEEE Signal processing letters},
	volume={21},
	number={1},
	pages={65--68},
	year={2014},
	publisher={IEEE}
}

@inproceedings{weninger2015speech,
	title={Speech enhancement with LSTM recurrent neural networks and its application to noise-robust ASR},
	author={Weninger, Felix and Erdogan, Hakan and Watanabe, Shinji and Vincent, Emmanuel and Le Roux, Jonathan and Hershey, John R and Schuller, Bj{\"o}rn},
	booktitle={International Conference on Latent Variable Analysis and Signal Separation},
	pages={91--99},
	year={2015},
	organization={Springer}
}

@inproceedings{du2014robust,
	title={Robust speech recognition with speech enhanced deep neural networks},
	author={Du, Jun and Wang, Qing and Gao, Tian and Xu, Yong and Dai, Li-Rong and Lee, Chin-Hui},
	booktitle={Fifteenth Annual Conference of the International Speech Communication Association},
	year={2014}
}

@inproceedings{gao2015joint,
	title={Joint training of front-end and back-end deep neural networks for robust speech recognition},
	author={Gao, Tian and Du, Jun and Dai, Li-Rong and Lee, Chin-Hui},
	booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={4375--4379},
	year={2015},
	organization={IEEE}
}

@inproceedings{xiao2016deep,
	title={Deep beamforming networks for multi-channel speech recognition},
	author={Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Lu, Liang and Hershey, John and Seltzer, Michael L and Chen, Guoguo and Zhang, Yu and Mandel, Michael and Yu, Dong},
	booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5745--5749},
	year={2016},
	organization={IEEE}
}
@article{wang2016joint,
	title={A joint training framework for robust automatic speech recognition},
	author={Wang, Zhong-Qiu and Wang, DeLiang},
	journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
	volume={24},
	number={4},
	pages={796--806},
	year={2016},
	publisher={IEEE}
}

@inproceedings{ko2015audio,
	title={Audio augmentation for speech recognition},
	author={Ko, Tom and Peddinti, Vijayaditya and Povey, Daniel and Khudanpur, Sanjeev},
	booktitle={Sixteenth Annual Conference of the International Speech Communication Association},
	year={2015}
}
@inproceedings{ko2017study,
	title={A study on data augmentation of reverberant speech for robust speech recognition},
	author={Ko, Tom and Peddinti, Vijayaditya and Povey, Daniel and Seltzer, Michael L and Khudanpur, Sanjeev},
	booktitle={2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5220--5224},
	year={2017},
	organization={IEEE}
}

@article{gales1998maximum,
	title={Maximum likelihood linear transformations for HMM-based speech recognition},
	author={Gales, Mark JF},
	journal={Computer speech \& language},
	volume={12},
	number={2},
	pages={75--98},
	year={1998},
	publisher={Elsevier}
}
@inproceedings{eide1996parametric,
	title={A parametric approach to vocal tract length normalization},
	author={Eide, Ellen and Gish, Herbert},
	booktitle={1996 IEEE International Conference on Acoustics, Speech, and Signal Processing Conference Proceedings},
	volume={1},
	pages={346--348},
	year={1996},
	organization={IEEE}
}
@techreport{povey2011kaldi,
	title={The Kaldi speech recognition toolkit},
	author={Povey, Daniel and Ghoshal, Arnab and Boulianne, Gilles and Burget, Lukas and Glembek, Ondrej and Goel, Nagendra and Hannemann, Mirko and Motlicek, Petr and Qian, Yanmin and Schwarz, Petr and others},
	year={2011},
	institution={IEEE Signal Processing Society}
}
@inproceedings{saon2013speaker,
	title={Speaker adaptation of neural network acoustic models using i-vectors},
	author={Saon, George and Soltau, Hagen and Nahamoo, David and Picheny, Michael},
	booktitle={2013 IEEE Workshop on Automatic Speech Recognition and Understanding},
	pages={55--59},
	year={2013},
	organization={IEEE}
}
@inproceedings{swietojanski2014learning,
	title={Learning hidden unit contributions for unsupervised speaker adaptation of neural network acoustic models},
	author={Swietojanski, Pawel and Renals, Steve},
	booktitle={2014 IEEE Spoken Language Technology Workshop (SLT)},
	pages={171--176},
	year={2014},
	organization={IEEE}
}
@inproceedings{miao2014towards,
	title={Towards speaker adaptive training of deep neural network acoustic models},
	author={Miao, Yajie and Zhang, Hao and Metze, Florian},
	booktitle={Fifteenth Annual Conference of the International Speech Communication Association},
	year={2014}
}
@inproceedings{seltzer2013investigation,
	title={An investigation of deep neural networks for noise robust speech recognition},
	author={Seltzer, Michael L and Yu, Dong and Wang, Yongqiang},
	booktitle={2013 IEEE international conference on acoustics, speech and signal processing},
	pages={7398--7402},
	year={2013},
	organization={IEEE}
}

@article{qian2016neural,
	title={Neural network based multi-factor aware joint training for robust speech recognition},
	author={Qian, Yanmin and Tan, Tian and Yu, Dong},
	journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
	volume={24},
	number={12},
	pages={2231--2240},
	year={2016},
	publisher={IEEE}
}
@inproceedings{Mirco2015Contaminated,
	title={Contaminated speech training methods for robust DNN-HMM distant speech recognition},
	author={Mirco Ravanelli and Maurizio Omologo},
	booktitle={INTERSPEECH 2015},
	year={2015},
}

@article{IsraelSpeech,
	title={Speech enhancement for non-stationary noise environments},
	author={Israel Cohen and Baruch Berdugo},
	journal={Signal Processing},
	volume={81},
	number={11},
	pages={2403-2418},
}

@article{LiA,
	title={A Spectral Masking Approach to Noise-Robust Speech Recognition Using Deep Neural Networks},
	author={Li, Bo and Sim, Khe Chai},
	journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
	volume={22},
	number={8},
	pages={1296-1305},
}
@inproceedings{Li2014An,
	title={An ideal hidden-activation mask for deep neural networks based noise-robust speech recognition},
	author={Li, Bo and Sim, Khe Chai},
	booktitle={ICASSP 2014 - 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	year={2014},
}

@inproceedings{xue2014singular,
	title={Singular value decomposition based low-footprint speaker adaptation and personalization for deep neural network},
	author={Xue, Jian and Li, Jinyu and Yu, Dong and Seltzer, Mike and Gong, Yifan},
	booktitle={2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={6359--6363},
	year={2014},
	organization={IEEE}
}

@article{xue2016speaker,
	title={Speaker adaptation of hybrid NN/HMM model for speech recognition based on singular value decomposition},
	author={Xue, Shaofei and Jiang, Hui and Dai, Lirong and Liu, Qingfeng},
	journal={Journal of Signal Processing Systems},
	volume={82},
	number={2},
	pages={175--185},
	year={2016},
	publisher={Springer}
}
@inproceedings{mitra2015time,
	title={Time-frequency convolutional networks for robust speech recognition},
	author={Mitra, Vikramjit and Franco, Horacio},
	booktitle={2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
	pages={317--323},
	year={2015},
	organization={IEEE}
}

@article{qian2016very,
	title={Very deep convolutional neural networks for noise robust speech recognition},
	author={Qian, Yanmin and Bi, Mengxiao and Tan, Tian and Yu, Kai},
	journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
	volume={24},
	number={12},
	pages={2263--2276},
	year={2016},
	publisher={IEEE}
}
@article{Abdel2014Convolutional,
	title={Convolutional Neural Networks for Speech Recognition},
	author={O.Abdel-Hamid, A.-r.Mohamed, H.Jiang},
	journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
	volume={22},
	number={10},
	pages={1533--1545},
	year={2014},
	publisher={IEEE}
}
@article{LeCun1995Convolutional,
	title={Convolutional Networks for Images, Speech, and Time Series},
	author={Y.LeCun, Y.Bengio},
	journal={The handbook of brain theory and neural networks},
	volume={3361},
	number={10},
	year={1995}
}
@article{Sainath2015Convolutional,
	title={Convolutional, Long Short-term Memory,Fully Connected Deep Neural Networks[},
	author={T.N.Sainath, O.Vinyals, A.Senior,et al},
	booktitle={Acoustics, Speech and Signal Processing (ICASSP)},
	pages={4580--4584},
	year={2015}?
	publisher={IEEE}
}
@article{Kawakami2008Supervised,
	title={Supervised Sequence Labelling with Recurrent Neural Networks},
	author={K.Kawakami},
	journal={Ph.D. dissertation, PhD thesis. Ph. D. thesis},
	year={2008}
}

@inproceedings{Sak2014Long,
	title={Long Short-term Memory Based Recurrent Neural Network Architectures for Large Vocabulary Speech Recognition},
	author={H.Sak, A.Senior, F.Beaufays},
	booktitle={Computer Science},
	pages={338--342},
	year={2014}
}


@inproceedings{hirsch2000aurora,
	title={The Aurora experimental framework for the performance evaluation of speech recognition systems under noisy conditions},
	author={Hirsch, Hans-G{\"u}nter and Pearce, David},
	booktitle={ASR2000-Automatic Speech Recognition: Challenges for the new Millenium ISCA Tutorial and Research Workshop (ITRW)},
	year={2000}
}

@article{li2014overview,
	title={An overview of noise-robust automatic speech recognition},
	author={Li, Jinyu and Deng, Li and Gong, Yifan and Haeb-Umbach, Reinhold},
	journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
	volume={22},
	number={4},
	pages={745--777},
	year={2014},
	publisher={IEEE}
}
@book{yu2016automatic,
	title={AUTOMATIC SPEECH RECOGNITION.},
	author={Yu, Dong and Deng, Li},
	year={2016},
	publisher={Springer}
}
@book{li2015robust,
	title={Robust automatic speech recognition: a bridge to practical applications},
	author={Li, Jinyu and Deng, Li and Haeb-Umbach, Reinhold and Gong, Yifan},
	year={2015},
	publisher={Academic Press}
}

@article{li2017large,
	title={Large-Scale Domain Adaptation via Teacher-Student Learning},
	author={Li, Jinyu and Seltzer, Michael L and Wang, Xi and Zhao, Rui and Gong, Yifan},
	journal={Proc. Interspeech 2017},
	pages={2386--2390},
	year={2017}
}
@inproceedings{markov2016robust,
	title={Robust Speech Recognition Using Generalized Distillation Framework.},
	author={Markov, Konstantin and Matsui, Tomoko},
	booktitle={Interspeech},
	pages={2364--2368},
	year={2016}
}

@inproceedings{watanabe2017student,
	title={Student-teacher network learning with enhanced features},
	author={Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R},
	booktitle={2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5275--5279},
	year={2017},
	organization={IEEE}
}


@ARTICLE{279278,
	author={J. -. {Gauvain} and {Chin-Hui Lee}},
	journal={IEEE Transactions on Speech and Audio Processing},
	title={Maximum a posteriori estimation for multivariate Gaussian mixture observations of Markov chains},
	year={1994},
	volume={2},
	number={2},
	pages={291-298},
	month={April},}

@inproceedings{chen2015speech,
	title={Speech enhancement and recognition using multi-task learning of long short-term memory recurrent neural networks},
	author={Chen, Zhuo and Watanabe, Shinji and Erdogan, Hakan and Hershey, John R},
	booktitle={Sixteenth Annual Conference of the International Speech Communication Association},
	year={2015}
}

@inproceedings{evgeniou2004regularized,
	title={Regularized multi-task learning},
	author={Evgeniou, Theodoros and Pontil, Massimiliano},
	booktitle={Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining},
	pages={109--117},
	year={2004},
	organization={ACM}
}

@inproceedings{sriram2018robust,
	title={Robust speech recognition using generative adversarial networks},
	author={Sriram, Anuroop and Jun, Heewoo and Gaur, Yashesh and Satheesh, Sanjeev},
	booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5639--5643},
	year={2018},
	organization={IEEE}
}
@inproceedings{shinohara2016adversarial,
	title={Adversarial Multi-Task Learning of Deep Neural Networks for Robust Speech Recognition.},
	author={Shinohara, Yusuke},
	booktitle={INTERSPEECH},
	pages={2369--2372},
	year={2016},
	organization={San Francisco, CA, USA}
}
@article{sun2017unsupervised,
	title={An unsupervised deep domain adaptation approach for robust speech recognition},
	author={Sun, Sining and Zhang, Binbin and Xie, Lei and Zhang, Yanning},
	journal={Neurocomputing},
	volume={257},
	pages={79--87},
	year={2017},
	publisher={Elsevier}
}
@inproceedings{meng2018speaker,
	title={Speaker-invariant training via adversarial learning},
	author={Meng, Zhong and Li, Jinyu and Chen, Zhuo and Zhao, Yang and Mazalov, Vadim and Gang, Yifan and Juang, Biing-Hwang},
	booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5969--5973},
	year={2018},
	organization={IEEE}
}

@article{mirza2014conditional,
	title={Conditional generative adversarial nets},
	author={Mirza, Mehdi and Osindero, Simon},
	journal={arXiv:1411.1784},
	year={2014}
}
@inproceedings{arjovsky2017wasserstein,
	title={Wasserstein generative adversarial networks},
	author={Arjovsky, Martin and Chintala, Soumith and Bottou, L{\'e}on},
	booktitle={International Conference on Machine Learning},
	pages={214--223},
	year={2017}
}
@article{Ananth1996Maximum,
	title={Maximum-likelihood approach to stochastic matching for robust speech recognition},
	author={Ananth Sankar and Chin-Hui Lee},
	journal={IEEE Transactions on Speech and Audio Processing},
	volume={4},
	number={3},
	pages={190-202},
	year={1996},
}

@inproceedings{liu2016coupled,
	title={Coupled generative adversarial networks},
	author={Liu, Ming-Yu and Tuzel, Oncel},
	booktitle={Advances in neural information processing systems},
	pages={469--477},
	year={2016}
}
@inproceedings{yu2017seqgan,
	title={Seqgan: Sequence generative adversarial nets with policy gradient},
	author={Yu, Lantao and Zhang, Weinan and Wang, Jun and Yu, Yong},
	booktitle={Thirty-First AAAI Conference on Artificial Intelligence},
	year={2017}
}

@inproceedings{chen2016infogan,
	title={Infogan: Interpretable representation learning by information maximizing generative adversarial nets},
	author={Chen, Xi and Duan, Yan and Houthooft, Rein and Schulman, John and Sutskever, Ilya and Abbeel, Pieter},
	booktitle={Advances in neural information processing systems},
	pages={2172--2180},
	year={2016}
}

@inproceedings{liu2017adversarial,
	title={Adversarial Multi-task Learning for Text Classification},
	author={Liu, Pengfei and Qiu, Xipeng and Huang, Xuanjing},
	booktitle={Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	volume={1},
	pages={1--10},
	year={2017}
}
@article{zhao2017multiple,
	title={Multiple source domain adaptation with adversarial training of neural networks},
	author={Zhao, Han and Zhang, Shanghang and Wu, Guanhang and Costeira, Joao P and Moura, Jos{\'e} MF and Gordon, Geoffrey J},
	journal={arXiv:1705.09684},
	year={2017}
}
@inproceedings{sun2018domain,
	title={Domain adversarial training for accented speech recognition},
	author={Sun, Sining and Yeh, Ching-Feng and Hwang, Mei-Yuh and Ostendorf, Mari and Xie, Lei},
	booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={4854--4858},
	year={2018},
	organization={IEEE}
}

@inproceedings{he2017adversarial,
	title={Adversarial example defense: Ensembles of weak defenses are not strong},
	author={He, Warren and Wei, James and Chen, Xinyun and Carlini, Nicholas and Song, Dawn},
	booktitle={11th $\{$USENIX$\}$ Workshop on Offensive Technologies ($\{$WOOT$\}$ 17)},
	year={2017}
}

@article{huang2017adversarial,
	title={Adversarial attacks on neural network policies},
	author={Huang, Sandy and Papernot, Nicolas and Goodfellow, Ian and Duan, Yan and Abbeel, Pieter},
	journal={arXiv:1702.02284},
	year={2017}
}

@article{goodfellow2014explaining,
	title={Explaining and harnessing adversarial examples},
	author={Goodfellow, Ian J and Shlens, Jonathon and Szegedy, Christian},
	journal={arXiv:1412.6572},
	year={2014}
}

@inproceedings{athalye2018synthesizing,
	title={Synthesizing Robust Adversarial Examples},
	author={Athalye, Anish and Engstrom, Logan and Ilyas, Andrew and Kwok, Kevin},
	booktitle={International Conference on Machine Learning},
	pages={284--293},
	year={2018}
}

@article{miyato2018virtual,
	title={Virtual adversarial training: a regularization method for supervised and semi-supervised learning},
	author={Miyato, Takeru and Maeda, Shin-ichi and Ishii, Shin and Koyama, Masanori},
	journal={IEEE transactions on pattern analysis and machine intelligence},
	year={2018},
	publisher={IEEE}
}
@article{miyato2015distributional,
	title={Distributional smoothing with virtual adversarial training},
	author={Miyato, Takeru and Maeda, Shin-ichi and Koyama, Masanori and Nakae, Ken and Ishii, Shin},
	journal={ICLR},
	year={2016}
}
@inproceedings{carlini2018audio,
	title={Audio adversarial examples: Targeted attacks on speech-to-text},
	author={Carlini, Nicholas and Wagner, David},
	booktitle={2018 IEEE Security and Privacy Workshops (SPW)},
	pages={1--7},
	year={2018},
	organization={IEEE}
}

@inproceedings{cisse2017houdini,
	title={Houdini: Fooling deep structured visual and speech recognition models with adversarial examples},
	author={Cisse, Moustapha M and Adi, Yossi and Neverova, Natalia and Keshet, Joseph},
	booktitle={Advances in Neural Information Processing Systems},
	pages={6977--6987},
	year={2017}
}

@inproceedings{amodei2016deep,
	title={Deep speech 2: End-to-end speech recognition in english and mandarin},
	author={Amodei, Dario and Ananthanarayanan, Sundaram and Anubhai, Rishita and Bai, Jingliang and Battenberg, Eric and Case, Carl and Casper, Jared and Catanzaro, Bryan and Cheng, Qiang and Chen, Guoliang and others},
	booktitle={International conference on machine learning},
	pages={173--182},
	year={2016}
}
@inproceedings{graves2014towards,
	title={Towards end-to-end speech recognition with recurrent neural networks},
	author={Graves, Alex and Jaitly, Navdeep},
	booktitle={International Conference on Machine Learning},
	pages={1764--1772},
	year={2014}
}


@inproceedings{miao2015eesen,
	title={EESEN: End-to-end speech recognition using deep RNN models and WFST-based decoding},
	author={Miao, Yajie and Gowayyed, Mohammad and Metze, Florian},
	booktitle={2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
	pages={167--174},
	year={2015},
	organization={IEEE}
}

@article{pascual2017segan,
	title={SEGAN: Speech enhancement generative adversarial network},
	author={Pascual, Santiago and Bonafonte, Antonio and Serr{\`a}, Joan},
	journal={arXiv:1703.09452},
	year={2017}
}
@inproceedings{donahue2018exploring,
	title={Exploring speech enhancement with generative adversarial networks for robust speech recognition},
	author={Donahue, Chris and Li, Bo and Prabhavalkar, Rohit},
	booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5024--5028},
	year={2018},
	organization={IEEE}
}

@article{wang2018investigating,
	title={Investigating Generative Adversarial Networks Based Speech Dereverberation for Robust Speech Recognition},
	author={Wang, Ke and Zhang, Junbo and Sun, Sining and Wang, Yujun and Xiang, Fei and Xie, Lei},
	journal={Proc. Interspeech 2018},
	pages={1581--1585},
	year={2018}
}
@article{li2018single,
	title={Single-channel Speech Dereverberation via Generative Adversarial Training},
	author={Li, Chenxing and Wang, Tieqiang and Xu, Shuang and Xu, Bo},
	journal={Proc. Interspeech 2018},
	pages={1309--1313},
	year={2018}
}
@inproceedings{yang2017statistical,
	title={Statistical parametric speech synthesis using generative adversarial networks under a multi-task learning framework},
	author={Yang, Shan and Xie, Lei and Chen, Xiao and Lou, Xiaoyan and Zhu, Xuan and Huang, Dongyan and Li, Haizhou},
	booktitle={2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
	pages={685--691},
	year={2017},
	organization={IEEE}
}

@inproceedings{kaneko2017generative,
	title={Generative adversarial network-based postfilter for statistical parametric speech synthesis},
	author={Kaneko, Takuhiro and Kameoka, Hirokazu and Hojo, Nobukatsu and Ijima, Yusuke and Hiramatsu, Kaoru and Kashino, Kunio},
	booktitle={2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={4910--4914},
	year={2017},
	organization={IEEE}
}
@article{saito2018statistical,
	title={Statistical parametric speech synthesis incorporating generative adversarial networks},
	author={Saito, Yuki and Takamichi, Shinnosuke and Saruwatari, Hiroshi},
	journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
	volume={26},
	number={1},
	pages={84--96},
	year={2018},
	publisher={IEEE}
}

@inproceedings{fang2019channel,
	title={Channel adversarial training for cross-channel text-independent speaker recognition},
	author={Fang, Xin and Zou, Liang and Li, Jin and Sun, Lei and Ling, Zhen-Hua},
	booktitle={ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={6221--6225},
	year={2019},
	organization={IEEE}
}
@inproceedings{wang2018unsupervised,
	title={Unsupervised domain adaptation via domain adversarial training for speaker recognition},
	author={Wang, Qing and Rao, Wei and Sun, Sining and Xie, Leib and Chng, Eng Siong and Li, Haizhou},
	booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={4889--4893},
	year={2018},
	organization={IEEE}
}
@book{rabiner1978digital,
	title={Digital processing of speech signals},
	author={Rabiner, Lawrence R and Schafer, Ronald W},
	volume={100},
	year={1978},
	publisher={Prentice-hall Englewood Cliffs, NJ}
}

@article{rabiner1989tutorial,
	title={A tutorial on hidden Markov models and selected applications in speech recognition},
	author={Rabiner, Lawrence R},
	journal={Proceedings of the IEEE},
	volume={77},
	number={2},
	pages={257--286},
	year={1989},
	publisher={Ieee}
}
@book{jurafsky2000speech,
	title={Speech \& language processing},
	author={Jurafsky, Dan},
	year={2000},
	publisher={Pearson Education India}
}
@inproceedings{mikolov2010recurrent,
	title={Recurrent neural network based language model},
	author={Mikolov, Tom{\'a}{\v{s}} and Karafi{\'a}t, Martin and Burget, Luk{\'a}{\v{s}} and {\v{C}}ernock{\`y}, Jan and Khudanpur, Sanjeev},
	booktitle={Eleventh annual conference of the international speech communication association},
	year={2010}
}
@inproceedings{mikolov2011extensions,
	title={Extensions of recurrent neural network language model},
	author={Mikolov, Tom{\'a}{\v{s}} and Kombrink, Stefan and Burget, Luk{\'a}{\v{s}} and {\v{C}}ernock{\`y}, Jan and Khudanpur, Sanjeev},
	booktitle={2011 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5528--5531},
	year={2011},
	organization={IEEE}
}

@article{mohri2002weighted,
	title={Weighted finite-state transducers in speech recognition},
	author={Mohri, Mehryar and Pereira, Fernando and Riley, Michael},
	journal={Computer Speech \& Language},
	volume={16},
	number={1},
	pages={69--88},
	year={2002},
	publisher={Elsevier}
}
@inproceedings{allauzen2007openfst,
	title={OpenFst: A general and efficient weighted finite-state transducer library},
	author={Allauzen, Cyril and Riley, Michael and Schalkwyk, Johan and Skut, Wojciech and Mohri, Mehryar},
	booktitle={International Conference on Implementation and Application of Automata},
	pages={11--23},
	year={2007},
	organization={Springer}
}

@incollection{mohri2008speech,
	title={Speech recognition with weighted finite-state transducers},
	author={Mohri, Mehryar and Pereira, Fernando and Riley, Michael},
	booktitle={Springer Handbook of Speech Processing},
	pages={559--584},
	year={2008},
	publisher={Springer}
}

@inproceedings{barker2015third,
	title={The third CHiME speech separation and recognition challenge: Dataset, task and baselines},
	author={Barker, Jon and Marxer, Ricard and Vincent, Emmanuel and Watanabe, Shinji},
	booktitle={2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
	pages={504--511},
	year={2015},
	organization={IEEE}
}

@inproceedings{xiao2016study,
	title={A study of learning based beamforming methods for speech recognition},
	author={Xiao, Xiong and Xu, Chenglin and Zhang, Zhaofeng and Zhao, Shengkui and Sun, Sining and Watanabe, Shinji and Wang, Longbiao and Xie, Lei and Jones, Douglas L and Chng, Eng Siong and others},
	booktitle={CHiME 2016 workshop},
	pages={26--31},
	year={2016}
}

@inproceedings{povey2016purely,
	title={Purely Sequence-Trained Neural Networks for ASR Based on Lattice-Free MMI.},
	author={Povey, Daniel and Peddinti, Vijayaditya and Galvez, Daniel and Ghahremani, Pegah and Manohar, Vimal and Na, Xingyu and Wang, Yiming and Khudanpur, Sanjeev},
	booktitle={Interspeech},
	pages={2751--2755},
	year={2016}
}

@article{woodland2002large,
	title={Large scale discriminative training of hidden Markov models for speech recognition},
	author={Woodland, Philip C and Povey, Daniel},
	journal={Computer Speech \& Language},
	volume={16},
	number={1},
	pages={25--47},
	year={2002},
	publisher={Elsevier}
}

@inproceedings{povey2002minimum,
	title={Minimum phone error and I-smoothing for improved discriminative training},
	author={Povey, Daniel and Woodland, Philip C},
	booktitle={2002 IEEE International Conference on Acoustics, Speech, and Signal Processing},
	volume={1},
	pages={I--105},
	year={2002},
	organization={IEEE}
}
@inproceedings{Waibel1989Phoneme,
	title={Phoneme Recognition Using Time-delay Neural Networks},
	author={A.Waibel, T.Hanazawa, G.Hinton},
	booktitle={1989 IEEE International Conference on Acoustics, Speech, and Signal Processing},
	volume={37},
	number={3},
	pages={328--339},
	year={1989},
	organization={IEEE}
}
@inproceedings{elfeky2016towards,
	title={Towards acoustic model unification across dialects},
	author={Elfeky, Mohamed and Bastani, Meysam and Velez, Xavier and Moreno, Pedro and Waters, Austin},
	booktitle={2016 IEEE Spoken Language Technology Workshop (SLT)},
	pages={624--628},
	year={2016},
	organization={IEEE}
}

@incollection{kurakin2018adversarial,
	title={Adversarial Examples in the Physical World},
	author={Kurakin, Alexey and Goodfellow, Ian J and Bengio, Samy},
	booktitle={Artificial Intelligence Safety and Security},
	pages={99--112},
	year={2018},
	publisher={Chapman and Hall/CRC}
}

@article{kurakin2016adversarial,
	title={Adversarial machine learning at scale},
	author={Kurakin, Alexey and Goodfellow, Ian and Bengio, Samy},
	journal={arXiv:1611.01236},
	year={2016}
}
@inproceedings{papernot2017practical,
	title={Practical black-box attacks against machine learning},
	author={Papernot, Nicolas and McDaniel, Patrick and Goodfellow, Ian and Jha, Somesh and Celik, Z Berkay and Swami, Ananthram},
	booktitle={Proceedings of the 2017 ACM on Asia Conference on Computer and Communications Security},
	pages={506--519},
	year={2017},
	organization={ACM}
}
@article{tramer2017ensemble,
	title={Ensemble adversarial training: Attacks and defenses},
	author={Tram{\`e}r, Florian and Kurakin, Alexey and Papernot, Nicolas and Goodfellow, Ian and Boneh, Dan and McDaniel, Patrick},
	journal={arXiv:1705.07204},
	year={2017}
}

@inproceedings{cisse2017parseval,
	title={Parseval networks: Improving robustness to adversarial examples},
	author={Cisse, Moustapha and Bojanowski, Piotr and Grave, Edouard and Dauphin, Yann and Usunier, Nicolas},
	booktitle={Proceedings of the 34th International Conference on Machine Learning-Volume 70},
	pages={854--863},
	year={2017},
	organization={JMLR. org}
}
@inproceedings{papernot2016distillation,
	title={Distillation as a defense to adversarial perturbations against deep neural networks},
	author={Papernot, Nicolas and McDaniel, Patrick and Wu, Xi and Jha, Somesh and Swami, Ananthram},
	booktitle={2016 IEEE Symposium on Security and Privacy (SP)},
	pages={582--597},
	year={2016},
	organization={IEEE}
}


@inproceedings{goodfellow2013maxout,
	title={Maxout Networks},
	author={Goodfellow, Ian and Warde-Farley, David and Mirza, Mehdi and Courville, Aaron and Bengio, Yoshua},
	booktitle={International Conference on Machine Learning},
	pages={1319--1327},
	year={2013}
}

@inproceedings{li2014learning,
	title={Learning small-size DNN with output-distribution-based criteria},
	author={Li, Jinyu and Zhao, Rui and Huang, Jui-Ting and Gong, Yifan},
	booktitle={Fifteenth annual conference of the international speech communication association},
	year={2014}
}

@article{yi2018distilling,
	title={Distilling knowledge using parallel data for far-field speech recognition},
	author={Yi, Jiangyan and Tao, Jianhua and Wen, Zhengqi and Liu, Bin},
	journal={arXiv:1802.06941},
	year={2018}
}
@article{hinton2015distilling,
	title={Distilling the knowledge in a neural network},
	author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
	journal={arXiv:1503.02531},
	year={2015}
}

@inproceedings{li2018developing,
	title={Developing far-field speaker system via teacher-student learning},
	author={Li, Jinyu and Zhao, Rui and Chen, Zhuo and Liu, Changliang and Xiao, Xiong and Ye, Guoli and Gong, Yifan},
	booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5699--5703},
	year={2018},
	organization={IEEE}
}

@inproceedings{meng2018adversarial,
	title={Adversarial teacher-student learning for unsupervised domain adaptation},
	author={Meng, Zhong and Li, Jinyu and Gong, Yifan and Juang, Biing-Hwang},
	booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={5949--5953},
	year={2018},
	organization={IEEE}
}

@article{vincent20164th,
	title={The 4th CHiME speech separation and recognition challenge},
	author={Vincent, Emmanuel and Watanabe, S and Barker, Jon and Marxer, Ricard},
	journal={CHiME CHALLENGE},
	year={2016}
}

@incollection{golub2001eigenvalue,
	title={Eigenvalue computation in the 20th century},
	author={Golub, Gene H and Van der Vorst, Henk A},
	booktitle={Numerical analysis: historical developments in the 20th century},
	pages={209--239},
	year={2001},
	publisher={Elsevier}
}
@inproceedings{bu2017aishell,
	title={AIShell-1: An open-source Mandarin speech corpus and a speech recognition baseline},
	author={Bu, Hui and Du, Jiayu and Na, Xingyu and Wu, Bengu and Zheng, Hao},
	booktitle={2017 20th Conference of the Oriental Chapter of the International Coordinating Committee on Speech Databases and Speech I/O Systems and Assessment (O-COCOSDA)},
	pages={1--5},
	year={2017},
	organization={IEEE}
}

@article{du2018aishell,
	title={AISHELL-2: Transforming Mandarin ASR Research Into Industrial Scale},
	author={Du, Jiayu and Na, Xingyu and Liu, Xuechen and Bu, Hui},
	journal={arXiv:1808.10583},
	year={2018}
}
@article{snyder2015musan,
	title={Musan: A music, speech, and noise corpus},
	author={Snyder, David and Chen, Guoguo and Povey, Daniel},
	journal={ arXiv:1510.08484},
	year={2015}
}

@inproceedings{weng2018improving,
	title={Improving attention based sequence-tosequence models for end-to-end english conversational speech recognition},
	author={Weng, Chao and Cui, Jia and Wang, Guangsen and Wang, Jun and Yu, Chengzhu and Su, Dan and Yu, Dong},
	booktitle={Interspeech 2018},
	year={2018}
}

@inproceedings{Povey2018Semi,
	title={Semi-orthogonal Low-rank Matrix Factorization for Deep Neural Networks},
	author={D.Povey, G.Cheng, Y.Wang,et al.},
	booktitle={Interspeech 2018},
	pages={3743--3747},
	year={2018}
}
@inproceedings{bengio2015scheduled,
	title={Scheduled sampling for sequence prediction with recurrent neural networks},
	author={Bengio, Samy and Vinyals, Oriol and Jaitly, Navdeep and Shazeer, Noam},
	booktitle={Advances in Neural Information Processing Systems},
	pages={1171--1179},
	year={2015}
}

@inproceedings{Vaswani2017Attention,
	title={Attention Is all You Need},
	author={Vaswani,Shazeer,Parmaret,et al},
	booktitle={Advances in Neural Information Processing Systems},
	pages={5998--6008},
	year={2017}
}
@article{park2019specaugment,
	title={SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition},
	author={Park, Daniel S and Chan, William and Zhang, Yu and Chiu, Chung-Cheng and Zoph, Barret and Cubuk, Ekin D and Le, Quoc V},
	journal={arXiv:1904.08779},
	year={2019}
}

@inproceedings{chen2014small,
	title={Small-footprint keyword spotting using deep neural networks},
	author={Chen, Guoguo and Parada, Carolina and Heigold, Georg},
	booktitle={2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={4087--4091},
	year={2014},
	organization={IEEE}
}
@inproceedings{Ngoc2012first,
	title={A first speech recognition system for Mandarin-English code-switch conversational speech},
	author={Ngoc Thang Vu, Dau-Cheng Lyu, Jochen Weiner, Dominic Telaar, Tim Schlippe, Fabian Blaicher, Eng-Siong Chng, Tanja Schultz, Haizhou Li},
	booktitle={2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={4889--4892},
	year={2012},
	organization={IEEE}
}
@inproceedings{Ying2013Improved,
	title={Improved mixed language speech recognition using asymmetric acoustic model and language model with code-switch inversion constraints},
	author={Ying Li?Fung, Pascale},
	booktitle={2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	pages={7368--7372},
	year={2013},
	organization={IEEE}
}
@article{shan2018attention,
	title={Attention-based End-to-End Models for Small-Footprint Keyword Spotting},
	author={Shan, Changhao and Zhang, Junbo and Wang, Yujun and Xie, Lei},
	journal={Proc. Interspeech 2018},
	pages={2037--2041},
	year={2018}
}

@article{howard2017mobilenets,
	title={Mobilenets: Efficient convolutional neural networks for mobile vision applications},
	author={Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
	journal={arXiv:1704.04861},
	year={2017}
}

@article{MoustaphaParseval,
	title={Parseval Networks: Improving Robustness to Adversarial Examples},
	author={Moustapha, Cisse and Piotr, Bojanowski and Edouard, Grave and Yann, Dauphin and Nicolas, Usunier},
}

@INPROCEEDINGS{8683479,
	author={X. {Wang} and S. {Sun} and C. {Shan} and J. {Hou} and L. {Xie} and S. {Li} and X. {Lei}},
	booktitle={ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
	title={Adversarial Examples for Improving End-to-end Attention-based Small-footprint Keyword Spotting},
	year={2019},
	volume={},
	number={},
	pages={6366-6370},
	doi={10.1109/ICASSP.2019.8683479},
	ISSN={2379-190X},
	month={May},}

@article{leggetter1995maximum,
	title={Maximum likelihood linear regression for speaker adaptation of continuous density hidden Markov models},
	author={Leggetter, Christopher J and Woodland, Philip C},
	journal={Computer speech \& language},
	volume={9},
	number={2},
	pages={171--185},
	year={1995},
	publisher={Academic Press}
}

@book{virtanen2012techniques,
	title={Techniques for noise robustness in automatic speech recognition},
	author={Virtanen, Tuomas and Singh, Rita and Raj, Bhiksha},
	year={2012},
	publisher={John Wiley \& Sons}
}

@inproceedings{azadi2018multi,
	title={Multi-content gan for few-shot font style transfer},
	author={Azadi, Samaneh and Fisher, Matthew and Kim, Vladimir G and Wang, Zhaowen and Shechtman, Eli and Darrell, Trevor},
	booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
	pages={7564--7573},
	year={2018}
}

@article{frid2018gan,
	title={GAN-based synthetic medical image augmentation for increased CNN performance in liver lesion classification},
	author={Frid-Adar, Maayan and Diamant, Idit and Klang, Eyal and Amitai, Michal and Goldberger, Jacob and Greenspan, Hayit},
	journal={Neurocomputing},
	volume={321},
	pages={321--331},
	year={2018},
	publisher={Elsevier}
}
@article{qin2019imperceptible,
	title={Imperceptible\, Robust\, and Targeted Adversarial Examples for Automatic Speech Recognition},
	author={Qin, Yao and Carlini, Nicholas and Cottrell, Garrison and Goodfellow, Ian and Raffel, Colin},
	booktitle={International Conference on Machine Learning},
	pages={5231--5240},
	year={2019}
}
@article{Graves2006Connectionist,
	title={Connectionist Temporal Classification:Labelling Unsegmented Sequence Data with Recurrent Neural Networks},
	author={A.Graves,S.Fernández,F.Gomez,et al},
	booktitle={International Conference on Machine Learning},
	pages={369--376},
	year={2006}
}
@inproceedings{Chen2018,
	author={Lianwu Chen and Meng Yu and Yanmin Qian and Dan Su and Dong Yu},
	title={Permutation Invariant Training of Generative Adversarial Network for Monaural Speech Separation},
	year=2018,
	booktitle={Proc. Interspeech 2018},
	pages={302--306},
}
@inproceedings{Lee2018,
	author={Joun Yeop Lee and Sung Jun Cheon and Byoung Jin Choi and Nam Soo Kim and Eunwoo Song},
	title={Acoustic Modeling Using Adversarially Trained Variational Recurrent Neural Network for Speech Synthesis},
	year=2018,
	booktitle={Proc. Interspeech 2018},
	pages={917--921},
}
@incollection{REDKO201921,
	title = "Domain Adaptation Problem",
	booktitle = "Advances in Domain Adaption Theory",
	publisher = "Elsevier",
	pages = "21 - 36",
	year = "2019",
	author = "Ievgen Redko and Amaury Habrard and Emilie Morvant and Marc Sebban and Youn¨¨s Bennani",
}
@INPROCEEDINGS{8099799,
	author={E. {Tzeng} and J. {Hoffman} and K. {Saenko} and T. {Darrell}},
	booktitle={2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
	title={Adversarial Discriminative Domain Adaptation},
	year={2017},
	volume={},
	number={},
	pages={2962-2971},
	month={July},}

@book{rabiner1975theory,
	title="Theory and Application of Digital Signal Processing",
	author="L. R. {Rabiner} and B. {Gold} and C. K. {Yuen}",
	year="1975"
}
